# -*- coding: utf-8 -*-

'''
This is the basic parser for fetched content.
'''

from HTMLParser import HTMLParser
import string

class snHTMLParser(HTMLParser):
	def handle_starttag(self, tag, attrs):
		if string.lower(tag) == "a" and attrs:
			for pair in attrs:
				if string.lower(pair[0]) == "href":
					print u"Encountered a start tag:", tag, pair[1]
		
	def handle_endtag(self, tag):
		pass
		#print u"Encountered an end tag :", tag.decode("utf-8").encode("gbk")
				
	def handle_data(self, data):
		pass
		#print u"Encountered some data  :", data.decode("utf-8").encode("gbk")
		

def test():
	parser = snHTMLParser()
	parser.feed(u'<html><head><title>Test</title></head>'
            '<body><h1>Parse me!</h1></body></html>')

if __name__ == u"__main__":
	test()

